library(tidyverse) # for data cleaning and plotting
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.0
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(googlesheets4) # for reading googlesheet data
library(lubridate) # for date manipulation
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(openintro) # for the abbr2state() function
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
library(palmerpenguins)# for Palmer penguin data
library(maps) # for map data
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
library(ggmap) # for mapping points on maps
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(gplots) # for col2hex() function
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library(RColorBrewer) # for color palettes
library(sf) # for working with spatial data
## Linking to GEOS 3.8.1, GDAL 3.1.1, PROJ 6.3.1
library(leaflet) # for highly customizable mapping
library(ggthemes) # for more themes (including theme_map())
library(plotly) # for the ggplotly() - basic interactivity
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggmap':
##
## wind
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(gganimate) # for adding animation layers to ggplots
library(transformr) # for "tweening" (gganimate)
##
## Attaching package: 'transformr'
## The following object is masked from 'package:sf':
##
## st_normalize
library(gifski) # need the library for creating gifs but don't need to load each time
library(shiny) # for creating interactive apps
gs4_deauth() # To not have to authorize each time you knit.
theme_set(theme_minimal())
library(ggridges)
# SNCF Train data
small_trains <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-02-26/small_trains.csv")
##
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
## year = col_double(),
## month = col_double(),
## service = col_character(),
## departure_station = col_character(),
## arrival_station = col_character(),
## journey_time_avg = col_double(),
## total_num_trips = col_double(),
## avg_delay_all_departing = col_double(),
## avg_delay_all_arriving = col_double(),
## num_late_at_departure = col_double(),
## num_arriving_late = col_double(),
## delay_cause = col_character(),
## delayed_number = col_double()
## )
# Lisa's garden data
garden_harvest <- read_sheet("https://docs.google.com/spreadsheets/d/1DekSazCzKqPS2jnGhKue7tLxRU3GVL1oxi-4bEM5IWw/edit?usp=sharing") %>%
mutate(date = ymd(date))
## Reading from "2020_harvest"
## Range "Sheet1"
# Lisa's Mallorca cycling data
mallorca_bike_day7 <- read_csv("https://www.dropbox.com/s/zc6jan4ltmjtvy0/mallorca_bike_day7.csv?dl=1") %>%
select(1:4, speed)
##
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
## lon = col_double(),
## lat = col_double(),
## ele = col_double(),
## time = col_datetime(format = ""),
## extensions = col_double(),
## ele.num = col_double(),
## date = col_date(format = ""),
## hrminsec = col_datetime(format = ""),
## time_hr = col_double(),
## dist_km = col_double(),
## speed = col_double()
## )
# Heather Lendway's Ironman 70.3 Pan Am championships Panama data
panama_swim <- read_csv("https://raw.githubusercontent.com/llendway/gps-data/master/data/panama_swim_20160131.csv")
##
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
## lon = col_double(),
## lat = col_double(),
## time = col_datetime(format = ""),
## extensions = col_double(),
## ele = col_logical(),
## event = col_character(),
## date = col_date(format = ""),
## hrminsec = col_datetime(format = "")
## )
panama_bike <- read_csv("https://raw.githubusercontent.com/llendway/gps-data/master/data/panama_bike_20160131.csv")
##
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
## lon = col_double(),
## lat = col_double(),
## ele = col_double(),
## time = col_datetime(format = ""),
## extensions = col_double(),
## event = col_character(),
## date = col_date(format = ""),
## hrminsec = col_datetime(format = "")
## )
panama_run <- read_csv("https://raw.githubusercontent.com/llendway/gps-data/master/data/panama_run_20160131.csv")
##
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
## lon = col_double(),
## lat = col_double(),
## ele = col_double(),
## time = col_datetime(format = ""),
## extensions = col_double(),
## event = col_character(),
## date = col_date(format = ""),
## hrminsec = col_datetime(format = "")
## )
#COVID-19 data from the New York Times
covid19 <- read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv")
##
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
## date = col_date(format = ""),
## state = col_character(),
## fips = col_character(),
## cases = col_double(),
## deaths = col_double()
## )
Go here or to previous homework to remind yourself how to get set up.
Once your repository is created, you should always open your project rather than just opening an .Rmd file. You can do that by either clicking on the .Rproj file in your repository folder on your computer. Or, by going to the upper right hand corner in R Studio and clicking the arrow next to where it says Project: (None). You should see your project come up in that list if you’ve used it recently. You could also go to File –> Open Project and navigate to your .Rproj file.
Put your name at the top of the document.
For ALL graphs, you should include appropriate labels.
Feel free to change the default theme, which I currently have set to theme_minimal().
Use good coding practice. Read the short sections on good code with pipes and ggplot2. This is part of your grade!
NEW!! With animated graphs, add eval=FALSE to the code chunk that creates the animation and saves it using anim_save(). Add another code chunk to reread the gif back into the file. See the tutorial for help.
When you are finished with ALL the exercises, uncomment the options at the top so your document looks nicer. Don’t do it before then, or else you might miss some important warnings and messages.
ggplotly() function.ikea <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-11-03/ikea.csv')
## Warning: Missing column names filled in: 'X1' [1]
##
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
## X1 = col_double(),
## item_id = col_double(),
## name = col_character(),
## category = col_character(),
## price = col_double(),
## old_price = col_character(),
## sellable_online = col_logical(),
## link = col_character(),
## other_colors = col_character(),
## short_description = col_character(),
## designer = col_character(),
## depth = col_double(),
## height = col_double(),
## width = col_double()
## )
new_ikea <- filter(ikea, grepl("0", designer, fixed = FALSE)) %>%
select(designer) %>%
mutate()
proper_ikea <- ikea[!(ikea$designer %in% new_ikea$designer),] %>%
select(name, category, price, designer) %>%
filter( price > "50000.0") %>%
summarise(price, category, designer) %>%
arrange(price) %>%
na.omit
top_priceIkea <- proper_ikea[order(-proper_ikea$price),] %>%
head(proper_ikea, n = 100)
top_ikea_graph <- top_priceIkea %>%
arrange(price) %>%
mutate( category = fct_reorder(category, price)) %>%
ggplot(aes(x = category, y = price, fill = category)) +
theme(axis.title.x=element_blank(), axis.text.x=element_blank(), plot.title = element_text(hjust = 0.5, size = 18, face = "bold"), plot.subtitle = element_text(hjust = 0.5, face = "italic")) +
labs(fill = "Appliance Type", title = "Most Expensive Appliances at Ikea", subtitle = "Macalester Intro to Data science Tidy Tuesday", y = "Recent Price in Saudi Riyals") +
geom_boxplot(color = "darkgray")
ggplotly(top_ikea_graph,
tooltip = c("text", "x"))
garden_harvest <- read_sheet("https://docs.google.com/spreadsheets/d/1DekSazCzKqPS2jnGhKue7tLxRU3GVL1oxi-4bEM5IWw/edit?usp=sharing") %>%
mutate(date = ymd(date))
## Reading from "2020_harvest"
## Range "Sheet1"
garden_graph <- garden_harvest %>%
filter(vegetable == "lettuce") %>%
ggplot(aes(y = weight, x = date, fill = variety)) +
geom_col(color = "black") +
labs(title = "Distribution of lettuce harvests", caption = "Data from Lisa Lenway", y = "Weight in grams", x = "Date") +
theme_dark() +
scale_fill_manual(values=c("gold", "dark green", "red", "light blue", "purple")) +
theme(legend.position = "bottom")
ggplotly(garden_graph,
tooltip = c("text", "x"))
small_trains dataset that contains data from the SNCF (National Society of French Railways). These are Tidy Tuesday data! Read more about it here.small_trains %>%
group_by(service) %>%
filter(!is.na(service)) %>%
filter(departure_station %in% c("ZURICH", "PARIS LYON"))
saved_trains <-small_trains %>%
ggplot(aes(x = num_late_at_departure,
y = service)) +
geom_density_ridges() +
transition_states(year) +
labs(title = "Departing Delay by French Train Services",
x = "Average Delay for Departing Trains (Minutes)",
y = "Train Service",
subtitle = "Moving to {next_state}")
anim_save("trains2.gif", saved_trains)
## Picking joint bandwidth of 3.94
geom_area() examples here). You will look at cumulative harvest of tomato varieties over time. You should do the following:garden_harvest data, filter the data to the tomatoes and find the daily harvest in pounds for each variety.fct_reorder()) from most to least harvested (most on the bottom).I have started the code for you below. The complete() function creates a row for all unique date/variety combinations. If a variety is not harvested on one of the harvest dates in the dataset, it is filled with a value of 0.
gardenHarv_ani <- garden_harvest %>%
filter(vegetable == "tomatoes") %>%
complete(variety,
date = seq.Date(min(date),
max(date),
by="day")) %>%
select(-c(vegetable, units)) %>%
mutate(weight = replace_na(weight, 0)) %>%
group_by(variety, date) %>%
summarize(daily_harvest_lb = sum(weight)*0.00220462) %>%
mutate(cumsum_daily_harvest_lb = cumsum(daily_harvest_lb)) %>%
select(-daily_harvest_lb) %>%
ggplot() +
geom_area(aes(x = date,
y = cumsum_daily_harvest_lb,
fill = variety),
position = position_stack()) +
transition_reveal(date) +
labs(title = "Cumulative Tomato Harvest Variety by Time",
x = "Date",
y = "Cumulative Daily Harvest in Lb",
subtitle = "Moving to {frame_along}")
## `summarise()` regrouping output by 'variety' (override with `.groups` argument)
gardenHarv_ani
anim_save("GardenHarv.gif", gardenHarv_ani)
mallorca_bike_day7 bike ride using animation! Requirements:ggmap.ggimage package and geom_image to add a bike image instead of a red point. You can use this image. See here for an example.mallorca_map <- get_stamenmap(
bbox = c(left = 2.28, bottom = 39.41, right = 3.03, top = 39.8),
maptype = "terrain",
zoom = 11)
## Source : http://tile.stamen.com/terrain/11/1036/776.png
## Source : http://tile.stamen.com/terrain/11/1037/776.png
## Source : http://tile.stamen.com/terrain/11/1038/776.png
## Source : http://tile.stamen.com/terrain/11/1039/776.png
## Source : http://tile.stamen.com/terrain/11/1040/776.png
## Source : http://tile.stamen.com/terrain/11/1041/776.png
## Source : http://tile.stamen.com/terrain/11/1036/777.png
## Source : http://tile.stamen.com/terrain/11/1037/777.png
## Source : http://tile.stamen.com/terrain/11/1038/777.png
## Source : http://tile.stamen.com/terrain/11/1039/777.png
## Source : http://tile.stamen.com/terrain/11/1040/777.png
## Source : http://tile.stamen.com/terrain/11/1041/777.png
## Source : http://tile.stamen.com/terrain/11/1036/778.png
## Source : http://tile.stamen.com/terrain/11/1037/778.png
## Source : http://tile.stamen.com/terrain/11/1038/778.png
## Source : http://tile.stamen.com/terrain/11/1039/778.png
## Source : http://tile.stamen.com/terrain/11/1040/778.png
## Source : http://tile.stamen.com/terrain/11/1041/778.png
## Source : http://tile.stamen.com/terrain/11/1036/779.png
## Source : http://tile.stamen.com/terrain/11/1037/779.png
## Source : http://tile.stamen.com/terrain/11/1038/779.png
## Source : http://tile.stamen.com/terrain/11/1039/779.png
## Source : http://tile.stamen.com/terrain/11/1040/779.png
## Source : http://tile.stamen.com/terrain/11/1041/779.png
ggmap(mallorca_map) +
geom_point(data = mallorca_bike_day7,
aes(x = lon, y = lat),
color = "blue", size = .5) +
geom_path(data = mallorca_bike_day7,
aes(x = lon, y = lat, color = ele),
size = .5) +
labs(title = "Lisa Mallorca Bike Trail",
subtitle = "Time: {frame_along}") +
transition_reveal(time) +
scale_color_viridis_c() +
theme_map() +
theme(legend.background = element_blank())